{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[0, 0, 0, ..., 0, 0, 0],\n",
       "        [0, 0, 0, ..., 0, 0, 0],\n",
       "        [0, 0, 0, ..., 0, 0, 0],\n",
       "        ...,\n",
       "        [0, 0, 0, ..., 0, 0, 0],\n",
       "        [0, 0, 0, ..., 0, 0, 0],\n",
       "        [0, 0, 0, ..., 0, 0, 0]]),\n",
       " array([0, 0, 0, ..., 9, 9, 9]),\n",
       " (1934, 1024),\n",
       " (1934,),\n",
       " (946, 1024),\n",
       " (946,))"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import os\n",
    "\n",
    "\n",
    "#加载数据\n",
    "def load_data(folder):\n",
    "    #遍历所有文件\n",
    "    files = os.listdir(folder)\n",
    "\n",
    "    #存储数据\n",
    "    x = np.empty((len(files), 1024), dtype=int)\n",
    "    y = np.empty(len(files), dtype=int)\n",
    "\n",
    "    for i in range(len(files)):\n",
    "        with open('手写数字_训练/' + files[i]) as fr:\n",
    "            lines = fr.readlines()\n",
    "\n",
    "        #读取一个文件,转为1024维的向量\n",
    "        x_i = []\n",
    "        for line in lines:\n",
    "            line = line.strip()\n",
    "            x_i.extend(list(line))\n",
    "\n",
    "        #存储到x中\n",
    "        x[i] = np.array(x_i, dtype=int)\n",
    "\n",
    "        #从文件名里解析y\n",
    "        y[i] = files[i].split('_')[0]\n",
    "    return x, y\n",
    "\n",
    "\n",
    "x, y = load_data('手写数字_训练')\n",
    "test_x, test_y = load_data('手写数字_测试')\n",
    "\n",
    "x, y, x.shape, y.shape, test_x.shape, test_y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def knn(_x, k=3):\n",
    "    temp = _x - x\n",
    "    temp = np.power(temp, 2)\n",
    "    temp = temp.sum(axis=1)\n",
    "    temp = np.sqrt(temp)\n",
    "    argsort = temp.argsort()\n",
    "\n",
    "    result = y[argsort][:k]\n",
    "    return np.bincount(result).argmax()\n",
    "\n",
    "\n",
    "knn(test_x[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9735729386892178\n"
     ]
    }
   ],
   "source": [
    "correct = 0\n",
    "for i in range(len(test_x)):\n",
    "    pred = knn(test_x[i], k=9)\n",
    "    #print(pred, test_y[i])\n",
    "\n",
    "    if pred == test_y[i]:\n",
    "        correct += 1\n",
    "\n",
    "print(correct / len(test_x))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
